import requests,re,json
from lxml import etree
import fake_useragent.fake

ua = fake_useragent.UserAgent()

r_u = "https://krdict.korean.go.kr/chn/dicSearchDetail/searchDetailWordsResult?nation=chn&nationCode=11&searchFlag=Y&sort=1&blockCount=100&currentPage="

class Korean_Dict:
    def __init__(self):
        pass

    def get_word(self,words_url):
        session = requests.Session()
        this_page = session.get(words_url)
        this_page.encoding = "utf-8"

    def main(self):
        for i in range(1, 521):
            words_url = r_u + str(i)
            self.get_word(words_url)
            break

if __name__ == '__main__':
    s = Korean_Dict()
    s.main()

"""我的简化版url"""
# https://krdict.korean.go.kr/chn/dicSearchDetail/searchDetailWordsResult?nation=chn&nationCode=11&searchFlag=Y&sort=1&currentPage=2&blockCount=100

# ,headers={"User-Agent":ua.random}


"""现在的问题是一个词条可以有好几个释义"""
# 词条的Xpath :    //*[@id="container"]/div/div[4]/form/div[1]/div[4]/dl[1]/dt/a[1]/span/text()
#                 //*[@id="container"]/div/div[4]/form/div[1]/div[4]/dl[2]/dt/a[1]/span/text()
#                 //*[@id="container"]/div/div[4]/form/div[1]/div[4]/dl[3]/dt/a[1]/span/text()
#                 //*[@id="container"]/div/div[4]/form/div[1]/div[4]/dl[28]/dt/a[1]/span/text()

# 词性的Xpath :    //*[@id="container"]/div/div[4]/form/div[1]/div[4]/dl[28]/dt/span[2]/span/text()
#                 //*[@id="container"]/div/div[4]/form/div[1]/div[4]/dl[11]/dt/span[2]/span/text()

# 韩语释义的Xpath: //*[@id="container"]/div/div[4]/form/div[1]/div[4]/dl[18]/dd[2]/text()
#